I'm gonna overwrite a lot of this notebook's old content. I changed the way I'm calculating wt, and wanna test that my training worked.



In [1]:

    
from pearce.emulator import OriginalRecipe, ExtraCrispy
from pearce.mocks import cat_dict
import numpy as np
from os import path



In [2]:

    
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()



In [3]:

    
%%bash
ls ~/des/Pearce*.hdf5









    



/u/ki/swmclau2/des/PearceRedMagicWpCosmo.hdf5
/u/ki/swmclau2/des/PearceRedMagicWpCosmo2.hdf5
/u/ki/swmclau2/des/PearceRedMagicWpCosmoTest.hdf5
/u/ki/swmclau2/des/PearceRedMagicXiChinchilla.hdf5
/u/ki/swmclau2/des/PearceRedMagicXiCosmo.hdf5
/u/ki/swmclau2/des/PearceRedMagicXiCosmoTest.hdf5
/u/ki/swmclau2/des/PearceTrainerTest1.hdf5



In [4]:

    
training_file = '/u/ki/swmclau2/des/PearceRedMagicXiCosmo.hdf5'
test_file = '/u/ki/swmclau2/des/PearceRedMagicXiCosmoTest.hdf5'
em_method = 'gp'
split_method = 'random'



In [5]:

    
a = 1.0
z = 1.0/a - 1.0



In [6]:

    
fixed_params = {'z':z, 'r':24.06822623}

n_leaves, n_overlap = 10, 1 emu = ExtraCrispy(training_file, n_leaves, n_overlap, split_method, method = em_method, fixed_params=fixed_params, custom_mean_function = None, downsample_factor = 1.0)



In [7]:

    
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,\
                     downsample_factor=1.0)#,
                    #hyperparams = {'n_estimators': 500,
                    #              'max_depth': 5})









    



/u/ki/swmclau2/.local/lib/python2.7/site-packages/pearce/emulator/emu.py:262: UserWarning: WARNING: NaN detected. Skipped 12788 points in training data.
  warnings.warn('WARNING: NaN detected. Skipped %d points in training data.' % (num_skipped))



In [8]:

    
emu.get_param_names()









    Out[8]:





['ombh2',
 'omch2',
 'w0',
 'ns',
 'ln10As',
 'H0',
 'Neff',
 'logM0',
 'sigma_logM',
 'logMmin',
 'logM1',
 'alpha']



In [9]:

    
import h5py
f = h5py.File(training_file, 'r')



In [10]:

    
np.array(f['cosmo_no_00/a_1.000/obs']).shape









    Out[10]:





(1000, 18)



In [11]:

    
emu.x.shape[0] + 12788









    Out[11]:





40000



In [12]:

    
for pname in emu.get_param_names():
    print pname, emu.get_param_bounds(pname)









    



ombh2 (0.02066455, 0.02371239)
omch2 (0.10121810000000001, 0.13177679999999997)
w0 (-1.399921, -0.56584860000000003)
ns (0.92784619999999995, 0.99744959999999994)
ln10As (3.0009000000000001, 3.179424)
H0 (61.694719999999997, 74.76751999999999)
Neff (2.6212499999999999, 4.2787499999999996)
logM0 (13.1, 16.100000000000001)
sigma_logM (0.050000000000000003, 0.29999999999999999)
logMmin (13.1, 14.1)
logM1 (13.1, 15.1)
alpha (0.80000000000000004, 1.2)

names = ['amp'] names.extend(emu.get_param_names()) dict(zip(names, np.array([1.282969797894147446e+00, 8.951664721378283575e+05, 7.830465404301183298e+04, 5.388438442608223624e-05, 4.398317466650228198e+03, 3.169582088726115320e-04, 1.253707987440919602e-01, 9.682153059967076467e-06, 2.348234600554276300e-05, 1.009271514630569800e+04, 2.837303616516211350e-04, 1.208270935044775918e-05, 5.004939102700953768e-01])))

v = np.array([-2.76674688, 39.90074862, 41.97733577, 34.908274 , 25.45566919, 32.9653712 , 45.25126079, 44.36071403, 43.39705654, 31.30384076, 29.14249287, 33.35982685, 68.25178863, 55.50543659, 0.5289175 , 3.26280959, 4.88974786, 5.20163106, 3.57189386, 4.0222253 , 35.7317967 , 3.04556806, 2.37166337, 1.22982813, 3.15514574, 1.06641224, 31.83494205, 0.12732846, 2.85868658])

v = np.array([-3.49251486e+00, 2.56895286e+02, 6.34889263e+02, 4.23540423e+00, 3.06144854e+01, 5.32898437e+02, 4.65947388e+00, 2.10953081e-01, 3.69172960e+02, 1.85079107e+02, 3.14421111e+02, 3.45236442e+02, 2.62822319e+02, 4.87881663e+02, 1.61183292e+00, 8.53907861e+02, 9.39441074e+01, 2.41645851e+01, 8.46587668e+02, 3.01748847e+02, 1.81427178e+00, 1.72439999e+00, 4.14188992e+00, 3.80508709e+00, -1.63855507e+00, 3.42620859e+02, 3.57459354e+00])

v = np.array([-2.51875005e+00, 3.99882406e+02, 2.59229552e+01, 2.91540851e+01, 1.39254458e+02, 8.12407915e+01, 2.49876072e+02, 2.74786641e+02, 4.56201422e+03, 6.03402392e+03, 8.23854446e+03, 6.63104020e+03, 5.20905761e+03, 1.94554178e+02, -9.88314607e-01, -1.11352182e+00, 2.77140729e+01, 2.82694588e+01, 1.17727291e+02, 1.27938574e+02, -2.12175268e+00, -1.49398245e+00, -2.46909795e+00, -8.50671268e-01, -3.76457784e+00])

v = np.array([-107.63201595, -3.00623478, 1.30030219, 73.91837015, -1.24519356, 61.31984424, -2.73478191, 80.84539419, 86.27226179, -16.47626201, -30.44683545, -35.42641201, -12.96207967, -43.95549179, -3.36130433, 37.9852565 , 3.28282274, 57.71964132, 105.93552856, 30.52956235, 81.30883655, 46.50496736, -4.63570007, 2.25765005, -11.86949411, 83.41893473, 75.44653974])

v = np.array([ -3.70809017, -3.44052739, 13.41401445, 10.36063547, -9.56185908, 8.74668577, -7.97667636, -1.31782856, -9.98796727, -9.63452286, 7.68799877, -7.48368788, -11.23455619, 1.13359274, -3.41003473, 12.78588911, 9.81411492, -9.12662602, 8.91865132, -7.61937602, -1.7967975, -11.04892756, -9.49986017, 9.41407695, -8.69062493, -11.28011457, -1.44207183] )



In [13]:

    
emu.get_param_names()









    Out[13]:





['ombh2',
 'omch2',
 'w0',
 'ns',
 'ln10As',
 'H0',
 'Neff',
 'logM0',
 'sigma_logM',
 'logMmin',
 'logM1',
 'alpha']

#zhongxu kenrel param_names = ['Omegam', 'Omegab', 'sigma_8', 'h', 'n_s', 'Neff', 'w', 'M_sat', 'alpha', 'Mcut', 'sigma_logM', 'eta_con', 'eta_vc', 'eta_vs', 'gamma_f'] v = np.array([ 0.2661017, 0.1054246, 1.1295944, 0.3643993, 0.2408568, 11.5649985, 5.6407612, 4.9071932, 10.6279446, 11.7621938, 4.7031938, 6.3770235, 11.7578699, 11.7547548, 8.4866085,/ -12.0550382, 1.8339794, 10.6161248, 2.2441632, 13.8155106, 10.6371797, 11.3512804, 7.342365 , 3.1795786, 3.7658774, 5.0188608, 4.6846614, 13.8155106, 13.8155106, 5.545777 , 13.8155106,\ -1.5383083, -13.8155106])

print len(param_names) print v.shape



In [14]:

    
zhongzhu_dict = {'omch2':[0.2661017,1.8339794 ], 'ombh2':[0.1054246,10.6161248], 'ln10As':[1.1295944,2.2441632],\
                 'H0':[0.3643993,13.8155106],\
                'ns':[0.2408568,10.6371797], 'Neff':[11.5649985,11.3512804], 'w0':[5.6407612,7.342365 ],\
                 'logM0': [4.9071932,3.1795786,],\
                 'alpha':[10.6279446,3.7658774], 'logM1':[11.7621938,5.0188608], 'sigma_logM':[4.7031938, 4.6846614], 'logMmin':[1.0, 1.0],
                'amp':[-12.0550382, 0.0,-1.5383083], 'r':[0.0, 0.0]}

names = ['amp']
names.extend(emu.get_param_names())
from itertools import cycle
names = cycle(names)
amp_count = 0
v = []
for n in names:
    if n== 'amp':
        amp_count+=1
    v.append(zhongzhu_dict[n][amp_count-1]) #this is a poison hack dont judge me
    #v.append(zhongzhu_dict[n][amp_count]) #this is a poison hack dont judge me

    if amp_count==3:
        break
        
v = np.array(v)
print v









    



[-12.0550382   0.1054246   0.2661017   5.6407612   0.2408568   1.1295944
   0.3643993  11.5649985   4.9071932   4.7031938   1.         11.7621938
  10.6279446   0.         10.6161248   1.8339794   7.342365   10.6371797
   2.2441632  13.8155106  11.3512804   3.1795786   4.6846614   1.
   5.0188608   3.7658774  -1.5383083]

zhongzhu_dict = {'omch2':[0.2661017,1.8339794 ], 'ombh2':[0.1054246,10.6161248], 'ln10As':[1.1295944,2.2441632],\ 'H0':[0.3643993,13.8155106],\ 'ns':[0.2408568,10.6371797], 'Neff':[11.5649985,11.3512804], 'w0':[5.6407612,7.342365 ],\ 'logM0': [4.9071932,3.1795786,],\ 'alpha':[10.6279446,3.7658774], 'logM1':[11.7621938,5.0188608], 'sigma_logM':[4.7031938, 4.6846614], 'logMmin':[1.0, 1.0], 'amp':[-12.0550382, 0.0], 'r':[0.0, 0.0]} names = ['amp'] names.extend(emu.get_param_names()) from itertools import cycle names = cycle(names) amp_count = 0 v = [-1.5383083] for n in names: if n== 'amp': amp_count+=1 if amp_count==3: break v.append(zhongzhu_dict[n][amp_count-1]) #this is a poison hack dont judge me #v.append(zhongzhu_dict[n][amp_count]) #this is a poison hack dont judge me v = np.array(v)

v = np.array([-1.59585658e+02, -4.28609431e+00, 5.26421111e+00, 2.23600305e+00, 3.31933458e+00, 3.94369625e+00, 5.49058429e+00, 1.18077464e+02, 1.39672483e+00, 6.05653776e+00, 8.85934071e+01, 5.02868853e+00, 6.16913182e+00, 9.16003441e+02, -2.56178530e+00, 2.34222918e+00, -3.01172284e-01, 4.39990318e-01, 1.54266085e+00, 1.48328501e+02, 9.24908910e-01, 1.67154090e+00, 1.48260489e+00, 1.27269583e+00, -8.56570699e+00, -2.53310968e+00, -4.39899560e+00])



In [15]:

    
emu._emulator.set_parameter_vector(v)



In [16]:

    
v









    Out[16]:





array([-12.0550382,   0.1054246,   0.2661017,   5.6407612,   0.2408568,
         1.1295944,   0.3643993,  11.5649985,   4.9071932,   4.7031938,
         1.       ,  11.7621938,  10.6279446,   0.       ,  10.6161248,
         1.8339794,   7.342365 ,  10.6371797,   2.2441632,  13.8155106,
        11.3512804,   3.1795786,   4.6846614,   1.       ,   5.0188608,
         3.7658774,  -1.5383083])



In [17]:

    
print emu._emulator.kernel









    



ConstantKernel(log_constant=-12.0550382, ndim=12, axes=array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])) + ConstantKernel(log_constant=0.1054246, ndim=12, axes=array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])) * ExpSquaredKernel(metric=Metric(array([  1.30486776e+00,   2.81677049e+02,   1.27233882e+00,
         3.09440116e+00,   1.43964895e+00,   1.05345268e+05,
         1.35259235e+02,   1.10298883e+02,   2.71828183e+00,
         1.28308628e+05,   4.12722071e+04,   1.00000000e+00]), ndim=12, axes=array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]), bounds=[(None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None)]), block=None) + ConstantKernel(log_constant=10.6161248, ndim=12, axes=array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])) * Matern32Kernel(metric=Metric(array([  6.25874321e+00,   1.54436021e+03,   4.16551254e+04,
         9.43251912e+00,   1.00000004e+06,   8.50743109e+04,
         2.40366224e+01,   1.08273605e+02,   2.71828183e+00,
         1.51238914e+02,   4.32015943e+01,   2.14744077e-01]), ndim=12, axes=array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]), bounds=[(None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None)]), block=None)

v = np.exp(np.array([-28.12679127, -2.48975207, 36.20587307, 0.83464329, 0.41127875, 21.71943652, -1.10745401, 3.49646035, 29.50413625, -2.76403498, -0.35738083, -3.47478515, -1.62618499, -2.76897448]))

emu._emulator.set_parameter_vector(v)

print emu._emulator.kernel



In [18]:

    
emu.scale_bin_centers









    Out[18]:





array([  0.09581734,   0.13534558,   0.19118072,   0.27004994,
         0.38145568,   0.53882047,   0.76110414,   1.07508818,
         1.51860241,   2.14508292,   3.03001016,   4.28000311,
         6.04566509,   8.53972892,  12.06268772,  17.0389993 ,
        24.06822623,  33.99727318])



In [19]:

    
emu.n_bins









    Out[19]:





1



In [20]:

    
#print emu.x.shape
#print emu.downsample_x.shape
if hasattr(emu, "_emulators"):
    print emu._emulators[0]._x.shape
else:
    print emu._emulator._x.shape









    



(27212, 12)



In [21]:

    
emu._ordered_params









    Out[21]:





OrderedDict([('ombh2', (0.02066455, 0.02371239)),
             ('omch2', (0.10121810000000001, 0.13177679999999997)),
             ('w0', (-1.399921, -0.56584860000000003)),
             ('ns', (0.92784619999999995, 0.99744959999999994)),
             ('ln10As', (3.0009000000000001, 3.179424)),
             ('H0', (61.694719999999997, 74.76751999999999)),
             ('Neff', (2.6212499999999999, 4.2787499999999996)),
             ('logM0', (13.1, 16.100000000000001)),
             ('sigma_logM', (0.050000000000000003, 0.29999999999999999)),
             ('logMmin', (13.1, 14.1)),
             ('logM1', (13.1, 15.1)),
             ('alpha', (0.80000000000000004, 1.2))])

x, y, y_pred = emu.goodness_of_fit(training_file, statistic = 'log_frac')

x, y, y_pred

N = 25 for _y, yp in zip(y[:N], y_pred[:N]): #plt.plot(emu.scale_bin_centers , (_y - yp)/yp ,alpha = 0.3, color = 'b') plt.plot(emu.scale_bin_centers, _y, alpha = 0.3, color = 'b') plt.plot(emu.scale_bin_centers, yp, alpha = 0.3, color = 'r') plt.loglog();

%%timeit #truth_file = '/u/ki/swmclau2/des/PearceRedMagicWpCosmoTest.hdf5' gof = emu.goodness_of_fit(training_file, statistic = 'log_frac')

gof = emu.goodness_of_fit(training_file, statistic = 'log_frac') print gof.mean()

gof = emu.goodness_of_fit(training_file, statistic = 'frac') print gof.mean()

plt.hist(np.log10(gof) );



In [22]:

    
from sklearn.model_selection import train_test_split



In [23]:

    
x, y, yerr = emu.x, emu.y, emu.yerr
downsample_idxs = np.random.choice(x.shape[0], size = int(0.08*x.shape[0]), replace = False)
x,y, yerr = x[downsample_idxs, :], y[downsample_idxs], yerr[downsample_idxs]

train_x, test_x, train_y, test_y, train_yerr, test_yerr = train_test_split(x, y, yerr, test_size = 0.1)



In [24]:

    
model = emu._emulator
model.compute(train_x, train_yerr)



In [25]:

    
pred_y = model.predict(train_y, test_x, False, False, False)*emu._y_std + emu._y_mean



In [26]:

    
np.mean(np.abs((pred_y-test_y)/test_y))









    Out[26]:





8.8897934204278783



In [20]:

    
model = emu._emulator
ypred = model.predict(emu.y, emu.x, False, False, False)*emu._y_std+emu._y_mean

for idx in xrange(50): plt.plot(emu.scale_bin_centers, ypred[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'Emu') plt.plot(emu.scale_bin_centers, emu.y[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'True') plt.title(np.sum(emu.x[(idx+1)*emu.n_bins, :-1]) ) plt.legend(loc='best') plt.xscale('log') plt.show()



In [21]:

    
resids = np.abs(emu.y*emu._y_std+emu._y_mean - ypred)



In [22]:

    
np.mean(resids/(emu.y*emu._y_std+emu._y_mean))









    Out[22]:





1.6006262671259795e-06



In [23]:

    
ypred.mean(), emu._y_mean









    Out[23]:





(-0.20411536519797333, 0.0)

plt.plot(emu.scale_bin_centers, np.abs(gof.mean(axis = 0)) ) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.05) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.1) plt.loglog();

plt.plot(emu.scale_bin_centers, np.abs(gof.T),alpha = 0.1, color = 'b') plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01, lw = 2, color = 'k') plt.loglog();



In [24]:

    
test_gof = emu.goodness_of_fit(test_file, statistic = 'log_frac')
print test_gof.mean()









    



3.93995000765






    



/u/ki/swmclau2/.local/lib/python2.7/site-packages/pearce/emulator/emu.py:265: UserWarning: WARNING: NaN detected. Skipped 2232 points in training data.
  warnings.warn('WARNING: NaN detected. Skipped %d points in training data.' % (num_skipped))



In [25]:

    
test_gof = emu.goodness_of_fit(test_file, statistic = 'frac')
print test_gof.mean()









    



0.495564065639



In [26]:

    
plt.hist(np.log10(test_gof));



In [27]:

    
test_x, test_y, test_yerr, _ = emu.get_data(test_file,fixed_params, None)



In [28]:

    
test_x









    Out[28]:





array([[  0.0232629 ,   0.10783   ,  -0.726513  , ...,  13.45135135,
         13.60850851,   0.95335335],
       [  0.0232629 ,   0.10783   ,  -0.726513  , ...,  13.58648649,
         14.1990991 ,   0.88968969],
       [  0.0232629 ,   0.10783   ,  -0.726513  , ...,  13.81171171,
         14.45335335,   1.17677678],
       ..., 
       [  0.0227629 ,   0.12033   ,  -0.90392   , ...,  13.76966967,
         13.5964965 ,   0.82402402],
       [  0.0227629 ,   0.12033   ,  -0.90392   , ...,  14.03993994,
         14.1950951 ,   1.03383383],
       [  0.0227629 ,   0.12033   ,  -0.90392   , ...,  13.5964965 ,
         13.56646647,   0.92452452]])



In [29]:

    
(emu.x*emu._x_std) + emu._x_mean









    Out[29]:





array([[  0.02268302,   0.11405866,  -0.81658903, ...,  13.55632089,
         14.76952197,   0.86485622],
       [  0.02268302,   0.11405866,  -0.81658903, ...,  13.7445072 ,
         13.62038432,   1.02661636],
       [  0.02268302,   0.11405866,  -0.81658903, ...,  13.56232684,
         14.05081044,   1.12151031],
       ..., 
       [  0.0217405 ,   0.1200688 ,  -0.94104349, ...,  13.53129612,
         13.60637044,   0.86605741],
       [  0.0217405 ,   0.1200688 ,  -0.94104349, ...,  14.03679657,
         14.30906612,   1.15874717],
       [  0.0217405 ,   0.1200688 ,  -0.94104349, ...,  13.9116727 ,
         13.2560236 ,   0.80799993]])



In [30]:

    
emu.get_param_names()









    Out[30]:





['ombh2',
 'omch2',
 'w0',
 'ns',
 'ln10As',
 'H0',
 'Neff',
 'logM0',
 'sigma_logM',
 'logMmin',
 'logM1',
 'alpha']



In [31]:

    
test_x_white, test_y_white = (test_x - emu._x_mean)/(emu._x_std + 1e-5), (test_y - emu._y_mean)/(emu._y_std + 1e-5)



In [32]:

    
model = emu._emulator



In [33]:

    
pred_y_white = model.predict(emu.y, test_x_white, False, False, False)



In [34]:

    
pred_y = pred_y_white*emu._y_std + emu._y_mean



In [35]:

    
plt.plot(pred_y[:100], label = 'pred')
plt.plot(test_y[:100], label = 'truth')

plt.legend(loc = 'best')









    Out[35]:





<matplotlib.legend.Legend at 0x7f4635300ed0>



In [36]:

    
test_y.mean(), emu._y_mean, pred_y.mean()









    Out[36]:





(-0.20508568125550344, 0.0, -0.22781020071041644)



In [37]:

    
test_y.std(), emu._y_std, pred_y.std()









    Out[37]:





(0.18713508718638672, 1.0, 0.18363598488483013)



In [38]:

    
plt.hist(pred_y_white, bins = np.linspace(-3, 3, 100), label = 'Pred')
plt.hist(test_y_white, bins = np.linspace(-3, 3, 100), label = 'Test', alpha = 0.4);
plt.legend(loc = 'best')









    Out[38]:





<matplotlib.legend.Legend at 0x7f46353cda10>



In [ ]:



In [ ]: